{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import xgboost as xgb\n",
    "from xgboost.sklearn import XGBClassifier\n",
    "from sklearn import  metrics   #Additional scklearn functions\n",
    "from sklearn.model_selection import GridSearchCV   #Perforing grid search\n",
    " \n",
    "import matplotlib.pylab as plt\n",
    "%matplotlib inline\n",
    "from matplotlib.pylab import rcParams"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>City</th>\n",
       "      <th>Monthly_Income</th>\n",
       "      <th>DOB</th>\n",
       "      <th>Lead_Creation_Date</th>\n",
       "      <th>Loan_Amount_Applied</th>\n",
       "      <th>Loan_Tenure_Applied</th>\n",
       "      <th>Existing_EMI</th>\n",
       "      <th>Salary_Account</th>\n",
       "      <th>Mobile_Verified</th>\n",
       "      <th>...</th>\n",
       "      <th>Loan_Tenure_Submitted</th>\n",
       "      <th>Interest_Rate</th>\n",
       "      <th>Processing_Fee</th>\n",
       "      <th>EMI_Loan_Submitted</th>\n",
       "      <th>Filled_Form</th>\n",
       "      <th>Device_Type</th>\n",
       "      <th>Var2</th>\n",
       "      <th>Source</th>\n",
       "      <th>Var4</th>\n",
       "      <th>Disbursed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>78</td>\n",
       "      <td>2</td>\n",
       "      <td>300000.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>35000</td>\n",
       "      <td>85</td>\n",
       "      <td>2</td>\n",
       "      <td>200000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>23</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2.0</td>\n",
       "      <td>13.25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6762.9</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>22500</td>\n",
       "      <td>81</td>\n",
       "      <td>2</td>\n",
       "      <td>600000.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>45</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>35000</td>\n",
       "      <td>87</td>\n",
       "      <td>2</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>45</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>16</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>100000</td>\n",
       "      <td>84</td>\n",
       "      <td>2</td>\n",
       "      <td>500000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>25000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 23 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Gender  City  Monthly_Income  DOB  Lead_Creation_Date  Loan_Amount_Applied  \\\n",
       "0       0     1           20000   78                   2             300000.0   \n",
       "1       1     1           35000   85                   2             200000.0   \n",
       "2       1     3           22500   81                   2             600000.0   \n",
       "3       1     3           35000   87                   2            1000000.0   \n",
       "4       1     1          100000   84                   2             500000.0   \n",
       "\n",
       "   Loan_Tenure_Applied  Existing_EMI  Salary_Account  Mobile_Verified  \\\n",
       "0                  5.0           0.0              21                0   \n",
       "1                  2.0           0.0              23                1   \n",
       "2                  4.0           0.0              45                1   \n",
       "3                  5.0           0.0              45                1   \n",
       "4                  2.0       25000.0              21                1   \n",
       "\n",
       "     ...      Loan_Tenure_Submitted  Interest_Rate  Processing_Fee  \\\n",
       "0    ...                        0.0           0.00             0.0   \n",
       "1    ...                        2.0          13.25             0.0   \n",
       "2    ...                        4.0           0.00             0.0   \n",
       "3    ...                        5.0           0.00             0.0   \n",
       "4    ...                        2.0           0.00             0.0   \n",
       "\n",
       "   EMI_Loan_Submitted  Filled_Form  Device_Type  Var2  Source  Var4  Disbursed  \n",
       "0                 0.0            0            1     6       0     1          0  \n",
       "1              6762.9            0            1     6       0     3          0  \n",
       "2                 0.0            0            1     1      16     1          0  \n",
       "3                 0.0            0            1     1      16     3          0  \n",
       "4                 0.0            0            1     1       8     3          0  \n",
       "\n",
       "[5 rows x 23 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = pd.read_csv(\"HappyBank_train_org.csv\")\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 87019 entries, 0 to 87018\n",
      "Data columns (total 23 columns):\n",
      "Gender                   87019 non-null int64\n",
      "City                     87019 non-null int64\n",
      "Monthly_Income           87019 non-null int64\n",
      "DOB                      87019 non-null int64\n",
      "Lead_Creation_Date       87019 non-null int64\n",
      "Loan_Amount_Applied      87019 non-null float64\n",
      "Loan_Tenure_Applied      87019 non-null float64\n",
      "Existing_EMI             87019 non-null float64\n",
      "Salary_Account           87019 non-null int64\n",
      "Mobile_Verified          87019 non-null int64\n",
      "Var5                     87019 non-null int64\n",
      "Var1                     87019 non-null int64\n",
      "Loan_Amount_Submitted    87019 non-null float64\n",
      "Loan_Tenure_Submitted    87019 non-null float64\n",
      "Interest_Rate            87019 non-null float64\n",
      "Processing_Fee           87019 non-null float64\n",
      "EMI_Loan_Submitted       87019 non-null float64\n",
      "Filled_Form              87019 non-null int64\n",
      "Device_Type              87019 non-null int64\n",
      "Var2                     87019 non-null int64\n",
      "Source                   87019 non-null int64\n",
      "Var4                     87019 non-null int64\n",
      "Disbursed                87019 non-null int64\n",
      "dtypes: float64(8), int64(15)\n",
      "memory usage: 15.3 MB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "先写好一个函数，创建XGBoost模型并执行交叉验证"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "def modelfit(alg, dtrain, predictors,useTrainCV=True, cv_folds=5, early_stopping_rounds=50):\n",
    "    if useTrainCV:\n",
    "        xgb_param = alg.get_xgb_params()\n",
    "        xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain['Disbursed'].values)\n",
    "        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,\n",
    "            metrics='auc', early_stopping_rounds=early_stopping_rounds)\n",
    "        alg.set_params(n_estimators=cvresult.shape[0])\n",
    " \n",
    "    #Fit the algorithm on the data\n",
    "    alg.fit(dtrain[predictors], dtrain['Disbursed'],eval_metric='auc')\n",
    " \n",
    "    #Predict training set:\n",
    "    dtrain_predictions = alg.predict(dtrain[predictors])\n",
    "    dtrain_predprob = alg.predict_proba(dtrain[predictors])[:,1]\n",
    " \n",
    "    #Print model report:\n",
    "    print (\"\\nModel Report\")\n",
    "    print('best n_estimators:' ,n_estimators)\n",
    "    print (\"Accuracy :\" ,metrics.accuracy_score(dtrain['Disbursed'].values, dtrain_predictions))\n",
    "    print (\"AUC Score (Train):\" ,metrics.roc_auc_score(dtrain['Disbursed'], dtrain_predprob))\n",
    " "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1.先调n_estimators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Model Report\n",
      "best n_estimators: 134\n",
      "Accuracy : 0.9854514531309254\n",
      "AUC Score (Train): 0.9161957202046292\n"
     ]
    }
   ],
   "source": [
    "predictors = [x for x in train.columns if x not in ['Disbursed']]\n",
    "xgb1 = XGBClassifier(\n",
    " learning_rate =0.1,\n",
    " n_estimators=1000,\n",
    " max_depth=5,\n",
    " min_child_weight=1,\n",
    " gamma=0,\n",
    " subsample=0.8,\n",
    " colsample_bytree=0.8,\n",
    " objective= 'binary:logistic',\n",
    " nthread=4,\n",
    " scale_pos_weight=1,\n",
    " seed=27)\n",
    "modelfit(xgb1, train, predictors)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. 最终结果是134，然后调max_depth，min_child_weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'max_depth': 5, 'min_child_weight': 1}, 0.8492752519665894)"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "param_test1 = {\n",
    " 'max_depth':range(3,10,2),\n",
    " 'min_child_weight':range(1,6,2)\n",
    "}\n",
    "gsearch1 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=134, max_depth=5,\n",
    " min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8,\n",
    " objective= 'binary:logistic', nthread=4, scale_pos_weight=1, seed=27), \n",
    " param_grid = param_test1, scoring='roc_auc',n_jobs=4,iid=False, cv=5)\n",
    "gsearch1.fit(train[predictors],train['Disbursed'])\n",
    "gsearch1.best_params_, gsearch1.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "这里看到结果是5和1,5的话是中间值，而1已经是最小值了，不需要再调了"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3. 调整gamma值，max_depth=5，min_child_weight=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'gamma': 0.3}, 0.849414497377395)"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "param_test2 = { \n",
    " 'gamma':[i/10.0 for i in range(0,5)]\n",
    "}\n",
    "gsearch2 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=134, max_depth=5,\n",
    " min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8,\n",
    " objective= 'binary:logistic', nthread=4, scale_pos_weight=1, seed=27), \n",
    " param_grid = param_test2, scoring='roc_auc',n_jobs=4,iid=False, cv=5)\n",
    "gsearch2.fit(train[predictors],train['Disbursed'])\n",
    "gsearch2.best_params_, gsearch2.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4. 调整subsample和colsample_bytree，gamma=0.3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'colsample_bytree': 0.9, 'subsample': 0.8}, 0.8504939283909885)"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "param_test3 = { \n",
    " 'subsample':[i/10.0 for i in range(6,10)],\n",
    " 'colsample_bytree':[i/10.0 for i in range(6,10)]\n",
    "\n",
    "}\n",
    "gsearch3 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=134, max_depth=5,\n",
    " min_child_weight=1, gamma=0.3, subsample=0.8, colsample_bytree=0.8,\n",
    " objective= 'binary:logistic', nthread=4, scale_pos_weight=1, seed=27), \n",
    " param_grid = param_test3, scoring='roc_auc',n_jobs=4,iid=False, cv=5)\n",
    "gsearch3.fit(train[predictors],train['Disbursed'])\n",
    "gsearch3.best_params_, gsearch3.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 5. 调整reg_alpha/reg_lambda，colsample_bytree=0.9，subsample=0.8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'reg_alpha': 1e-05}, 0.8504939513486413)"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "param_test4 = { \n",
    " 'reg_alpha':[1e-5, 1e-2, 0.1, 1, 100]\n",
    "}\n",
    "gsearch4 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=134, max_depth=5,\n",
    " min_child_weight=1, gamma=0.3, subsample=0.8, colsample_bytree=0.9,\n",
    " objective= 'binary:logistic', nthread=4, scale_pos_weight=1, seed=27), \n",
    " param_grid = param_test4, scoring='roc_auc',n_jobs=4,iid=False, cv=5)\n",
    "gsearch4.fit(train[predictors],train['Disbursed'])\n",
    "gsearch4.best_params_, gsearch4.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'reg_lambda': 1}, 0.8504939283909885)"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "param_test4 = { \n",
    " 'reg_lambda':[1e-5, 1e-2, 0.1, 1, 100]\n",
    "}\n",
    "gsearch4 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=134, max_depth=5,\n",
    " min_child_weight=1, gamma=0.3, subsample=0.8, colsample_bytree=0.9,\n",
    " objective= 'binary:logistic', nthread=4, scale_pos_weight=1, seed=27), \n",
    " param_grid = param_test4, scoring='roc_auc',n_jobs=4,iid=False, cv=5)\n",
    "gsearch4.fit(train[predictors],train['Disbursed'])\n",
    "gsearch4.best_params_, gsearch4.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "加入正则后分数在亿分位分数上升，所以就不加了"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 6. 最后降低学习率，增加更多的树"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Model Report\n",
      "best n_estimators: 1322\n",
      "Accuracy : 0.9853939944150129\n",
      "AUC Score (Train): 0.9144461109483757\n"
     ]
    }
   ],
   "source": [
    "xgb2 = XGBClassifier(\n",
    " learning_rate =0.01,\n",
    " n_estimators=5000,\n",
    " max_depth=5,\n",
    " min_child_weight=1,\n",
    " gamma=0.3,\n",
    " subsample=0.9,\n",
    " colsample_bytree=0.8,\n",
    " reg_alpha=1e-05,\n",
    " objective= 'binary:logistic',\n",
    " nthread=4,\n",
    " scale_pos_weight=1,\n",
    " seed=27)\n",
    "modelfit(xgb2, train, predictors)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 最后用所有的最佳参数来训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
       "       colsample_bytree=0.8, gamma=0, learning_rate=0.01, max_delta_step=0,\n",
       "       max_depth=5, min_child_weight=1, missing=None, n_estimators=1322,\n",
       "       n_jobs=1, nthread=4, objective='binary:logistic', random_state=0,\n",
       "       reg_alpha=1e-05, reg_lambda=1, scale_pos_weight=1, seed=None,\n",
       "       silent=True, subsample=0.9)"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params = {'learning_rate': 0.01,\n",
    "          'n_estimators': 1322, \n",
    "          'min_child_weight': 1,\n",
    "          'max_depth': 5,\n",
    "          'subsample': 0.9,\n",
    "          'colsample_bytree': 0.8,\n",
    "          'reg_alpha':1e-05,\n",
    "          'objective':'binary:logistic',\n",
    "          'nthread': 4\n",
    "         }\n",
    "\n",
    "xgb_g = XGBClassifier(**params)\n",
    "y_train = train['Disbursed']\n",
    "X_train = train.drop([\"Disbursed\"], axis=1)\n",
    "xgb_g.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 重要性特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame({\"columns\":list(X_train.columns), \"importance\":list(xgb_g.feature_importances_.T)})\n",
    "df = df.sort_values(by=['importance'],ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>columns</th>\n",
       "      <th>importance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Monthly_Income</td>\n",
       "      <td>0.169004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Existing_EMI</td>\n",
       "      <td>0.085211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Salary_Account</td>\n",
       "      <td>0.084483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>DOB</td>\n",
       "      <td>0.074981</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Var5</td>\n",
       "      <td>0.070345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>Source</td>\n",
       "      <td>0.060230</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>EMI_Loan_Submitted</td>\n",
       "      <td>0.059617</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Interest_Rate</td>\n",
       "      <td>0.056054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Loan_Amount_Applied</td>\n",
       "      <td>0.051839</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Loan_Amount_Submitted</td>\n",
       "      <td>0.050230</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Processing_Fee</td>\n",
       "      <td>0.043602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Var1</td>\n",
       "      <td>0.041188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>City</td>\n",
       "      <td>0.033946</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Var4</td>\n",
       "      <td>0.025134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Loan_Tenure_Applied</td>\n",
       "      <td>0.021992</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Loan_Tenure_Submitted</td>\n",
       "      <td>0.015556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Var2</td>\n",
       "      <td>0.015556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Filled_Form</td>\n",
       "      <td>0.015249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Lead_Creation_Date</td>\n",
       "      <td>0.011724</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Gender</td>\n",
       "      <td>0.006973</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Device_Type</td>\n",
       "      <td>0.004981</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Mobile_Verified</td>\n",
       "      <td>0.002107</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  columns  importance\n",
       "2          Monthly_Income    0.169004\n",
       "7            Existing_EMI    0.085211\n",
       "8          Salary_Account    0.084483\n",
       "3                     DOB    0.074981\n",
       "10                   Var5    0.070345\n",
       "20                 Source    0.060230\n",
       "16     EMI_Loan_Submitted    0.059617\n",
       "14          Interest_Rate    0.056054\n",
       "5     Loan_Amount_Applied    0.051839\n",
       "12  Loan_Amount_Submitted    0.050230\n",
       "15         Processing_Fee    0.043602\n",
       "11                   Var1    0.041188\n",
       "1                    City    0.033946\n",
       "21                   Var4    0.025134\n",
       "6     Loan_Tenure_Applied    0.021992\n",
       "13  Loan_Tenure_Submitted    0.015556\n",
       "19                   Var2    0.015556\n",
       "17            Filled_Form    0.015249\n",
       "4      Lead_Creation_Date    0.011724\n",
       "0                  Gender    0.006973\n",
       "18            Device_Type    0.004981\n",
       "9         Mobile_Verified    0.002107"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
